# -*- codeing = utf-8 -*-
import requests
import time
from bs4 import BeautifulSoup
for i in range(1,10):
#不是所有的都可以爬取，html动态页面和静态页面。
    URL = f'https://www.chinanews.com/scroll-news/news{i}.html'
    headers = {
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko Core/1.70.3877.400 QQBrowser/10.8.4506.400'
    }
    resp = requests.get(url=URL,headers=headers)
    if resp.status_code == 200:
        resp.encoding='zh'
        soup = BeautifulSoup(resp.text,'lxml')
        soup_list = soup.select('body > div#content > div#content_right > div.content_list > ul > li')
        for i in soup_list:
            #print(i)
            time_l = time.localtime()
            if i.select_one('li > div:nth-child(1) > a') != None and i.select_one('li > div:nth-child(3)').text > '10-14 23:59':
                #print(soup_list)
                #获取新闻类型
                news_type = i.select_one('li > div:nth-child(1) > a').text

                news_tt = i.select_one('li > div.dd_bt > a').text
                #print(news_tt)
                news_herf = 'https:'+i.select_one('li > div.dd_bt > a').attrs['href']

                news_time = i.select_one('li > div:nth-child(3)').text

                print(news_type,news_tt,news_herf,news_time)
    else:
        print("爬虫无效")
